Setup¶

Path to training data on drive¶

In [ ]:
path_folder = "../splits_final_deblurred/"

Imports¶

In [ ]:
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from keras.datasets import mnist
#import tensorflow.keras.backend as K
from matplotlib import pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Flatten, Activation, concatenate, BatchNormalization
import matplotlib.pyplot as plt
import numpy as np
from keras.activations import softmax
from keras.layers import concatenate
from keras.layers import UpSampling2D
from keras.layers import Dropout
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input
from keras.models import Model
import sys
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Softmax
import os
from PIL import Image

Helper Functions¶

In [ ]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

num_classes = 5
class_names = ["background", "rigid_plastic", "cardboard", "metal", "soft_plastic"]
class_colors = [[0,0,0], [0,0,255], [0,255,0], [255,0,0] ,[125,0,125]]

def Make_Overlapping_Plot2(image, label=None, pred=None):
    '''
    Makes one/two/three plots, first is the image, the others are segmentation/predicitons combined with image where background is unchanged, all other classes
    have a color.
    \n RED: Rigid Plastic
    \n GREEN: Cardboard
    \n BLUE: Metal
    \n PURPLE: Soft plastic.
    '''
    _, ax1 = plt.subplots()
    ax1.set_title("Image")
    ax1.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    if label is not None:
      red = np.where(label[:,:,1] == 1)
      green = np.where(label[:,:,2] == 1)
      blue = np.where(label[:,:,3] == 1)
      purple = np.where(label[:,:,4] == 1)
      image2 = np.copy(image)
      image2[red] = class_colors[1]
      image2[green] = class_colors[2]
      image2[blue] = class_colors[3]
      image2[purple] = class_colors[4]
      _, ax2 =plt.subplots()
      ax2.set_title("Overlapping Labels")
      ax2.imshow(cv2.cvtColor(image2, cv2.COLOR_BGR2RGB))

    if pred is not None:
      image3 = np.copy(image)
      red = np.where(pred[:,:,1] >= 0.5)
      green = np.where(pred[:,:,2] >= 0.5)
      blue = np.where(pred[:,:,3] >= 0.5)
      purple = np.where(pred[:,:,4] >= 0.5)
      image3[red] = class_colors[1]
      image3[green] = class_colors[2]
      image3[blue] = class_colors[3]
      image3[purple] = class_colors[4]
      _, ax3 =plt.subplots()
      ax3.set_title("predictions")
      ax3.imshow(cv2.cvtColor(image3, cv2.COLOR_BGR2RGB))

    plt.show()
    return 0

from tensorflow.keras.utils import Sequence
import numpy as np
import os
from PIL import Image

def create_y_labels(loaded_labels, y_labels):
  if loaded_labels.ndim == 3:
    for i in range(0,y_labels.shape[0]):
      background = np.where(loaded_labels[i,:,:] == 0)
      red = np.where(loaded_labels[i,:,:] == 1)
      green = np.where(loaded_labels[i,:,:] == 2)
      blue = np.where(loaded_labels[i,:,:] == 3)
      purple = np.where(loaded_labels[i,:,:] == 4)
      y_labels[i,:,:,0][background] = 1 #Background
      y_labels[i,:,:,1][red] = 1        #Rigid Plast
      y_labels[i,:,:,2][green] = 1      #Cardboard
      y_labels[i,:,:,3][blue] = 1       #Metal
      y_labels[i,:,:,4][purple] = 1     #SoftPlast
  else:
    for i in range(0,y_labels.shape[0]):
      background = np.where(loaded_labels[i,:,:,0] == 0)
      red = np.where(loaded_labels[i,:,:,0] == 1)
      green = np.where(loaded_labels[i,:,:,0] == 2)
      blue = np.where(loaded_labels[i,:,:,0] == 3)
      purple = np.where(loaded_labels[i,:,:,0] == 4)
      y_labels[i,:,:,0][background] = 1 #Background
      y_labels[i,:,:,1][red] = 1        #Rigid Plast
      y_labels[i,:,:,2][green] = 1      #Cardboard
      y_labels[i,:,:,3][blue] = 1       #Metal
      y_labels[i,:,:,4][purple] = 1     #SoftPlast
  return y_labels

VGG16-UNet Experiment¶

Model¶

Loss function

In [ ]:
import keras.backend as K

def dice_coef(y_true, y_pred):
  y_true = tf.reshape(tf.cast(y_true, tf.float64), [-1])
  y_pred = tf.reshape(tf.cast(y_pred, tf.float64), [-1])
  intersection = K.sum(y_true * y_pred)
  union = K.sum(y_true) + K.sum(y_pred)
  epsilon = 1e-4
  return (2*intersection + epsilon)/(union+epsilon)

def dice_loss_function(y_true, y_pred):
  coef = 0.
  for i in range(5):
    coef += dice_coef(y_true[:,:,:,i], y_pred[:,:,:,i])
  return 1-coef/5.

Similar to the model described in https://qims.amegroups.org/article/view/91409/pdf , page 3143, figure 3(b)

In [ ]:
from keras.layers import concatenate, BatchNormalization
from keras.layers import UpSampling2D
from keras.activations import softmax
from keras.applications.vgg16 import preprocess_input
input_dim = (224, 224)

# See last layer of network
def softMaxAxis3(x):
    return softmax(x,axis=3)

def my_conv(x,filters,kernel_size=3,padding='same',kernel_initializer='he_normal'):
  x = Conv2D(filters, kernel_size, padding=padding, kernel_initializer=kernel_initializer, strides=1)(x)
  #x = BatchNormalization()(x)
  x = Activation('relu')(x)
  return x

def apply_tensor_to_layers(x, layers):
  for l in layers:
    x = l(x)
  return x

vgg16 = keras.applications.VGG16(weights="imagenet",input_shape=(*input_dim, 3), include_top=False)
inputs = keras.Input(shape=(*input_dim, 3))
x = preprocess_input(inputs)  #shifts from RBG to BGR and zero centers pixels, BGR mean values = [-103.93900299, -116.77899933, -123.68000031]
x = vgg16.layers[1](x, training=False)

# create vgg16 encoder while remembering connections for concatenation
con1 = apply_tensor_to_layers(x, vgg16.layers[2:3])
con2 = apply_tensor_to_layers(con1, vgg16.layers[3:6])
con3 = apply_tensor_to_layers(con2, vgg16.layers[6:10])
con4 = apply_tensor_to_layers(con3, vgg16.layers[10:14])
con5 = apply_tensor_to_layers(con4, vgg16.layers[14:18])
encoder =  apply_tensor_to_layers(con5, vgg16.layers[18:19])
print(con5)
# decoder
up1 = my_conv(encoder, 1024)
up1 = my_conv(up1, 512)
up1 = UpSampling2D((2,2))(up1)
concat5 = concatenate([up1, con5] ,axis=3)

up2 = my_conv(concat5, filters=512)
up2 = my_conv(up2, filters=512)
up2 = UpSampling2D((2,2))(up2)
concat4 = concatenate([up2, con4], axis=3)

up3 = my_conv(concat4, filters=256)
up3 = my_conv(up3, filters=256)
up3 = UpSampling2D((2,2))(up3)
concat3 = concatenate([up3, con3], axis=3)

up4 = my_conv(concat3, filters=128)
up4 = my_conv(up4, filters=128)
up4 = UpSampling2D((2,2))(up4)
concat2 = concatenate([up4, con2], axis=3)

up5 = my_conv(concat2, filters=64)
up5 = my_conv(up5, filters=64)
up5 = UpSampling2D((2,2))(up5)
concat1 = concatenate([up5, con1], axis=3)

decoder = my_conv(concat1, filters=16)
decoder = Conv2D(num_classes, 1, activation = softMaxAxis3)(decoder)

# Model
model = Model(inputs, decoder)

#set vgg16 layers to not trainable
for i in range(19):
  model.layers[i].trainable = False

#, weight_decay=1e-6, momentum=0.9, nesterov=True
model.compile(optimizer = tf.keras.optimizers.legacy.SGD(learning_rate = 0.01), loss = dice_loss_function)
model.summary()
2023-12-07 12:52:29.341794: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2023-12-07 12:52:29.341818: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-12-07 12:52:29.341822: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-12-07 12:52:29.341862: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-07 12:52:29.341878: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
KerasTensor(type_spec=TensorSpec(shape=(None, 14, 14, 512), dtype=tf.float32, name=None), name='block5_conv3/Relu:0', description="created by layer 'block5_conv3'")
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
==================================================================================================
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 tf.__operators__.getitem (  (None, 224, 224, 3)          0         ['input_2[0][0]']             
 SlicingOpLambda)                                                                                 
                                                                                                  
 tf.nn.bias_add (TFOpLambda  (None, 224, 224, 3)          0         ['tf.__operators__.getitem[0][
 )                                                                  0]']                          
                                                                                                  
 block1_conv1 (Conv2D)       (None, 224, 224, 64)         1792      ['tf.nn.bias_add[0][0]']      
                                                                                                  
 block1_conv2 (Conv2D)       (None, 224, 224, 64)         36928     ['block1_conv1[1][0]']        
                                                                                                  
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)         0         ['block1_conv2[1][0]']        
                                                                                                  
 block2_conv1 (Conv2D)       (None, 112, 112, 128)        73856     ['block1_pool[1][0]']         
                                                                                                  
 block2_conv2 (Conv2D)       (None, 112, 112, 128)        147584    ['block2_conv1[1][0]']        
                                                                                                  
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)          0         ['block2_conv2[1][0]']        
                                                                                                  
 block3_conv1 (Conv2D)       (None, 56, 56, 256)          295168    ['block2_pool[1][0]']         
                                                                                                  
 block3_conv2 (Conv2D)       (None, 56, 56, 256)          590080    ['block3_conv1[1][0]']        
                                                                                                  
 block3_conv3 (Conv2D)       (None, 56, 56, 256)          590080    ['block3_conv2[1][0]']        
                                                                                                  
 block3_pool (MaxPooling2D)  (None, 28, 28, 256)          0         ['block3_conv3[1][0]']        
                                                                                                  
 block4_conv1 (Conv2D)       (None, 28, 28, 512)          1180160   ['block3_pool[1][0]']         
                                                                                                  
 block4_conv2 (Conv2D)       (None, 28, 28, 512)          2359808   ['block4_conv1[1][0]']        
                                                                                                  
 block4_conv3 (Conv2D)       (None, 28, 28, 512)          2359808   ['block4_conv2[1][0]']        
                                                                                                  
 block4_pool (MaxPooling2D)  (None, 14, 14, 512)          0         ['block4_conv3[1][0]']        
                                                                                                  
 block5_conv1 (Conv2D)       (None, 14, 14, 512)          2359808   ['block4_pool[1][0]']         
                                                                                                  
 block5_conv2 (Conv2D)       (None, 14, 14, 512)          2359808   ['block5_conv1[1][0]']        
                                                                                                  
 block5_conv3 (Conv2D)       (None, 14, 14, 512)          2359808   ['block5_conv2[1][0]']        
                                                                                                  
 block5_pool (MaxPooling2D)  (None, 7, 7, 512)            0         ['block5_conv3[1][0]']        
                                                                                                  
 conv2d (Conv2D)             (None, 7, 7, 1024)           4719616   ['block5_pool[1][0]']         
                                                                                                  
 activation (Activation)     (None, 7, 7, 1024)           0         ['conv2d[0][0]']              
                                                                                                  
 conv2d_1 (Conv2D)           (None, 7, 7, 512)            4719104   ['activation[0][0]']          
                                                                                                  
 activation_1 (Activation)   (None, 7, 7, 512)            0         ['conv2d_1[0][0]']            
                                                                                                  
 up_sampling2d (UpSampling2  (None, 14, 14, 512)          0         ['activation_1[0][0]']        
 D)                                                                                               
                                                                                                  
 concatenate (Concatenate)   (None, 14, 14, 1024)         0         ['up_sampling2d[0][0]',       
                                                                     'block5_conv3[1][0]']        
                                                                                                  
 conv2d_2 (Conv2D)           (None, 14, 14, 512)          4719104   ['concatenate[0][0]']         
                                                                                                  
 activation_2 (Activation)   (None, 14, 14, 512)          0         ['conv2d_2[0][0]']            
                                                                                                  
 conv2d_3 (Conv2D)           (None, 14, 14, 512)          2359808   ['activation_2[0][0]']        
                                                                                                  
 activation_3 (Activation)   (None, 14, 14, 512)          0         ['conv2d_3[0][0]']            
                                                                                                  
 up_sampling2d_1 (UpSamplin  (None, 28, 28, 512)          0         ['activation_3[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_1 (Concatenate  (None, 28, 28, 1024)         0         ['up_sampling2d_1[0][0]',     
 )                                                                   'block4_conv3[1][0]']        
                                                                                                  
 conv2d_4 (Conv2D)           (None, 28, 28, 256)          2359552   ['concatenate_1[0][0]']       
                                                                                                  
 activation_4 (Activation)   (None, 28, 28, 256)          0         ['conv2d_4[0][0]']            
                                                                                                  
 conv2d_5 (Conv2D)           (None, 28, 28, 256)          590080    ['activation_4[0][0]']        
                                                                                                  
 activation_5 (Activation)   (None, 28, 28, 256)          0         ['conv2d_5[0][0]']            
                                                                                                  
 up_sampling2d_2 (UpSamplin  (None, 56, 56, 256)          0         ['activation_5[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_2 (Concatenate  (None, 56, 56, 512)          0         ['up_sampling2d_2[0][0]',     
 )                                                                   'block3_conv3[1][0]']        
                                                                                                  
 conv2d_6 (Conv2D)           (None, 56, 56, 128)          589952    ['concatenate_2[0][0]']       
                                                                                                  
 activation_6 (Activation)   (None, 56, 56, 128)          0         ['conv2d_6[0][0]']            
                                                                                                  
 conv2d_7 (Conv2D)           (None, 56, 56, 128)          147584    ['activation_6[0][0]']        
                                                                                                  
 activation_7 (Activation)   (None, 56, 56, 128)          0         ['conv2d_7[0][0]']            
                                                                                                  
 up_sampling2d_3 (UpSamplin  (None, 112, 112, 128)        0         ['activation_7[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_3 (Concatenate  (None, 112, 112, 256)        0         ['up_sampling2d_3[0][0]',     
 )                                                                   'block2_conv2[1][0]']        
                                                                                                  
 conv2d_8 (Conv2D)           (None, 112, 112, 64)         147520    ['concatenate_3[0][0]']       
                                                                                                  
 activation_8 (Activation)   (None, 112, 112, 64)         0         ['conv2d_8[0][0]']            
                                                                                                  
 conv2d_9 (Conv2D)           (None, 112, 112, 64)         36928     ['activation_8[0][0]']        
                                                                                                  
 activation_9 (Activation)   (None, 112, 112, 64)         0         ['conv2d_9[0][0]']            
                                                                                                  
 up_sampling2d_4 (UpSamplin  (None, 224, 224, 64)         0         ['activation_9[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_4 (Concatenate  (None, 224, 224, 128)        0         ['up_sampling2d_4[0][0]',     
 )                                                                   'block1_conv2[1][0]']        
                                                                                                  
 conv2d_10 (Conv2D)          (None, 224, 224, 16)         18448     ['concatenate_4[0][0]']       
                                                                                                  
 activation_10 (Activation)  (None, 224, 224, 16)         0         ['conv2d_10[0][0]']           
                                                                                                  
 conv2d_11 (Conv2D)          (None, 224, 224, 5)          85        ['activation_10[0][0]']       
                                                                                                  
==================================================================================================
Total params: 35122469 (133.98 MB)
Trainable params: 22767589 (86.85 MB)
Non-trainable params: 12354880 (47.13 MB)
__________________________________________________________________________________________________
In [ ]:
vgg16.summary()
model.summary()
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    
                                                                 
 block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         
                                                                 
 block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         
                                                                 
 block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         
                                                                 
=================================================================
Total params: 14714688 (56.13 MB)
Trainable params: 2359808 (9.00 MB)
Non-trainable params: 12354880 (47.13 MB)
_________________________________________________________________
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
==================================================================================================
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 tf.__operators__.getitem (  (None, 224, 224, 3)          0         ['input_2[0][0]']             
 SlicingOpLambda)                                                                                 
                                                                                                  
 tf.nn.bias_add (TFOpLambda  (None, 224, 224, 3)          0         ['tf.__operators__.getitem[0][
 )                                                                  0]']                          
                                                                                                  
 block1_conv1 (Conv2D)       (None, 224, 224, 64)         1792      ['tf.nn.bias_add[0][0]']      
                                                                                                  
 block1_conv2 (Conv2D)       (None, 224, 224, 64)         36928     ['block1_conv1[1][0]']        
                                                                                                  
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)         0         ['block1_conv2[1][0]']        
                                                                                                  
 block2_conv1 (Conv2D)       (None, 112, 112, 128)        73856     ['block1_pool[1][0]']         
                                                                                                  
 block2_conv2 (Conv2D)       (None, 112, 112, 128)        147584    ['block2_conv1[1][0]']        
                                                                                                  
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)          0         ['block2_conv2[1][0]']        
                                                                                                  
 block3_conv1 (Conv2D)       (None, 56, 56, 256)          295168    ['block2_pool[1][0]']         
                                                                                                  
 block3_conv2 (Conv2D)       (None, 56, 56, 256)          590080    ['block3_conv1[1][0]']        
                                                                                                  
 block3_conv3 (Conv2D)       (None, 56, 56, 256)          590080    ['block3_conv2[1][0]']        
                                                                                                  
 block3_pool (MaxPooling2D)  (None, 28, 28, 256)          0         ['block3_conv3[1][0]']        
                                                                                                  
 block4_conv1 (Conv2D)       (None, 28, 28, 512)          1180160   ['block3_pool[1][0]']         
                                                                                                  
 block4_conv2 (Conv2D)       (None, 28, 28, 512)          2359808   ['block4_conv1[1][0]']        
                                                                                                  
 block4_conv3 (Conv2D)       (None, 28, 28, 512)          2359808   ['block4_conv2[1][0]']        
                                                                                                  
 block4_pool (MaxPooling2D)  (None, 14, 14, 512)          0         ['block4_conv3[1][0]']        
                                                                                                  
 block5_conv1 (Conv2D)       (None, 14, 14, 512)          2359808   ['block4_pool[1][0]']         
                                                                                                  
 block5_conv2 (Conv2D)       (None, 14, 14, 512)          2359808   ['block5_conv1[1][0]']        
                                                                                                  
 block5_conv3 (Conv2D)       (None, 14, 14, 512)          2359808   ['block5_conv2[1][0]']        
                                                                                                  
 block5_pool (MaxPooling2D)  (None, 7, 7, 512)            0         ['block5_conv3[1][0]']        
                                                                                                  
 conv2d (Conv2D)             (None, 7, 7, 1024)           4719616   ['block5_pool[1][0]']         
                                                                                                  
 activation (Activation)     (None, 7, 7, 1024)           0         ['conv2d[0][0]']              
                                                                                                  
 conv2d_1 (Conv2D)           (None, 7, 7, 512)            4719104   ['activation[0][0]']          
                                                                                                  
 activation_1 (Activation)   (None, 7, 7, 512)            0         ['conv2d_1[0][0]']            
                                                                                                  
 up_sampling2d (UpSampling2  (None, 14, 14, 512)          0         ['activation_1[0][0]']        
 D)                                                                                               
                                                                                                  
 concatenate (Concatenate)   (None, 14, 14, 1024)         0         ['up_sampling2d[0][0]',       
                                                                     'block5_conv3[1][0]']        
                                                                                                  
 conv2d_2 (Conv2D)           (None, 14, 14, 512)          4719104   ['concatenate[0][0]']         
                                                                                                  
 activation_2 (Activation)   (None, 14, 14, 512)          0         ['conv2d_2[0][0]']            
                                                                                                  
 conv2d_3 (Conv2D)           (None, 14, 14, 512)          2359808   ['activation_2[0][0]']        
                                                                                                  
 activation_3 (Activation)   (None, 14, 14, 512)          0         ['conv2d_3[0][0]']            
                                                                                                  
 up_sampling2d_1 (UpSamplin  (None, 28, 28, 512)          0         ['activation_3[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_1 (Concatenate  (None, 28, 28, 1024)         0         ['up_sampling2d_1[0][0]',     
 )                                                                   'block4_conv3[1][0]']        
                                                                                                  
 conv2d_4 (Conv2D)           (None, 28, 28, 256)          2359552   ['concatenate_1[0][0]']       
                                                                                                  
 activation_4 (Activation)   (None, 28, 28, 256)          0         ['conv2d_4[0][0]']            
                                                                                                  
 conv2d_5 (Conv2D)           (None, 28, 28, 256)          590080    ['activation_4[0][0]']        
                                                                                                  
 activation_5 (Activation)   (None, 28, 28, 256)          0         ['conv2d_5[0][0]']            
                                                                                                  
 up_sampling2d_2 (UpSamplin  (None, 56, 56, 256)          0         ['activation_5[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_2 (Concatenate  (None, 56, 56, 512)          0         ['up_sampling2d_2[0][0]',     
 )                                                                   'block3_conv3[1][0]']        
                                                                                                  
 conv2d_6 (Conv2D)           (None, 56, 56, 128)          589952    ['concatenate_2[0][0]']       
                                                                                                  
 activation_6 (Activation)   (None, 56, 56, 128)          0         ['conv2d_6[0][0]']            
                                                                                                  
 conv2d_7 (Conv2D)           (None, 56, 56, 128)          147584    ['activation_6[0][0]']        
                                                                                                  
 activation_7 (Activation)   (None, 56, 56, 128)          0         ['conv2d_7[0][0]']            
                                                                                                  
 up_sampling2d_3 (UpSamplin  (None, 112, 112, 128)        0         ['activation_7[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_3 (Concatenate  (None, 112, 112, 256)        0         ['up_sampling2d_3[0][0]',     
 )                                                                   'block2_conv2[1][0]']        
                                                                                                  
 conv2d_8 (Conv2D)           (None, 112, 112, 64)         147520    ['concatenate_3[0][0]']       
                                                                                                  
 activation_8 (Activation)   (None, 112, 112, 64)         0         ['conv2d_8[0][0]']            
                                                                                                  
 conv2d_9 (Conv2D)           (None, 112, 112, 64)         36928     ['activation_8[0][0]']        
                                                                                                  
 activation_9 (Activation)   (None, 112, 112, 64)         0         ['conv2d_9[0][0]']            
                                                                                                  
 up_sampling2d_4 (UpSamplin  (None, 224, 224, 64)         0         ['activation_9[0][0]']        
 g2D)                                                                                             
                                                                                                  
 concatenate_4 (Concatenate  (None, 224, 224, 128)        0         ['up_sampling2d_4[0][0]',     
 )                                                                   'block1_conv2[1][0]']        
                                                                                                  
 conv2d_10 (Conv2D)          (None, 224, 224, 16)         18448     ['concatenate_4[0][0]']       
                                                                                                  
 activation_10 (Activation)  (None, 224, 224, 16)         0         ['conv2d_10[0][0]']           
                                                                                                  
 conv2d_11 (Conv2D)          (None, 224, 224, 5)          85        ['activation_10[0][0]']       
                                                                                                  
==================================================================================================
Total params: 35122469 (133.98 MB)
Trainable params: 22767589 (86.85 MB)
Non-trainable params: 12354880 (47.13 MB)
__________________________________________________________________________________________________

plot model

Data Generator¶

Class implementation¶

In [ ]:
import numpy as np
import keras
import os
from PIL import Image
import cv2

class DataGenerator(keras.utils.Sequence):
    def __init__(self, base_path, data, batch_size=32, dim=(240, 360), n_channels=3, n_classes=5, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.data = data
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.base_path = base_path
        self.indexes = np.array(1)
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.data) / self.batch_size))

    def __getitem__(self, index):
       if self.shuffle:
         indexes = np.random.default_rng().choice(len(self.data), self.batch_size, replace=False)
         i = np.random.randint(len(self.data)-self.batch_size)
       else:
         indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
       data_temp = [self.data[i] for i in indexes]
       x, y = self.__data_generation(data_temp)
       return x, y

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        data_temp = [self.data[i] for i in indexes]
        x, y = self.__data_generation(data_temp)
        return x, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.data))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, data):
        x = np.empty((self.batch_size, self.dim[0], self.dim[1], self.n_channels), dtype=np.float32)
        y = np.empty((self.batch_size, self.dim[0], self.dim[1], self.n_channels), dtype=np.float32)
        for i, file_name in enumerate(data):
          image_path = os.path.join(self.base_path, 'data', file_name)
          seg_path = os.path.join(self.base_path, 'sem_seg', file_name)
          try:
              img_data = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
              index_for_square_cropping = np.random.randint(840)
              img_data = img_data[:, index_for_square_cropping : index_for_square_cropping+1080, :]
              img_data = cv2.resize(img_data, (self.dim[1], self.dim[0]), interpolation = cv2.INTER_NEAREST)
              x[i] = np.array(img_data)
              img_seg = cv2.imread(seg_path)
              img_seg = img_seg[:, index_for_square_cropping : index_for_square_cropping+1080, :]
              img_seg = cv2.resize(img_seg, (self.dim[1], self.dim[0]), interpolation = cv2.INTER_NEAREST)
              y[i] = np.array(img_seg)
          except Exception as e:
              print(f"Error loading {image_path}: {str(e)}")

        x = np.array(x, dtype=np.float32)
        y = create_y_labels(y, np.zeros((self.batch_size, self.dim[0], self.dim[1], self.n_classes), dtype=np.float32))
        return x, y

Load image file information including paths¶

In [ ]:
import json
import cv2
import numpy as np

def load_and_extract_ids(folder_path, data_type):
    file_path = f"{folder_path}/{data_type}/labels.json"

    with open(file_path) as file:
        data_loader = json.load(file)
    images = data_loader["images"]
    print(len(images))
    image_paths = []
    for i in range(0,len(images)): #len(images)
      image_paths.append(images[i]['file_name'])
    return image_paths

train_ids = load_and_extract_ids(path_folder, "train")
validation_ids = load_and_extract_ids(path_folder, "val")
test_ids = load_and_extract_ids(path_folder, "test")
3002
572
929

Create Data Generator Instance¶

In [ ]:
params = {'dim': input_dim,
          'batch_size': 32,
          'n_classes': 5,
          'n_channels': 3,
          'shuffle': True}

partition = {
    'train': train_ids,
    'val': validation_ids,
    'test': test_ids
}


training_generator = DataGenerator(base_path = path_folder + 'train/', data = partition['train'], **params)
validation_generator = DataGenerator(base_path = path_folder + 'val/', data = partition['val'], **params)
test_generator = DataGenerator(base_path = path_folder + 'test/', data = partition['test'], **params)

Training¶

Learning rate and fitting

In [ ]:
def our_learning_rate(epoch, learning_rate):
  if epoch == 0:
    return 0.01
  return learning_rate * tf.math.exp(-0.10)

callback = keras.callbacks.LearningRateScheduler(our_learning_rate)
history = model.fit(training_generator, validation_data=validation_generator, epochs=20, callbacks = [callback], verbose=1)
Epoch 1/20
2023-12-07 12:52:32.590594: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
93/93 [==============================] - 254s 3s/step - loss: 0.7412 - val_loss: 0.7119 - lr: 0.0100
Epoch 2/20
93/93 [==============================] - 238s 3s/step - loss: 0.7105 - val_loss: 0.6999 - lr: 0.0090
Epoch 3/20
93/93 [==============================] - 238s 3s/step - loss: 0.7001 - val_loss: 0.6891 - lr: 0.0082
Epoch 4/20
93/93 [==============================] - 231s 2s/step - loss: 0.6890 - val_loss: 0.6772 - lr: 0.0074
Epoch 5/20
93/93 [==============================] - 229s 2s/step - loss: 0.6728 - val_loss: 0.6627 - lr: 0.0067
Epoch 6/20
93/93 [==============================] - 228s 2s/step - loss: 0.6582 - val_loss: 0.6488 - lr: 0.0061
Epoch 7/20
93/93 [==============================] - 229s 2s/step - loss: 0.6502 - val_loss: 0.6401 - lr: 0.0055
Epoch 8/20
93/93 [==============================] - 229s 2s/step - loss: 0.6422 - val_loss: 0.6378 - lr: 0.0050
Epoch 9/20
93/93 [==============================] - 229s 2s/step - loss: 0.6390 - val_loss: 0.6286 - lr: 0.0045
Epoch 10/20
93/93 [==============================] - 229s 2s/step - loss: 0.6333 - val_loss: 0.6269 - lr: 0.0041
Epoch 11/20
93/93 [==============================] - 229s 2s/step - loss: 0.6290 - val_loss: 0.6255 - lr: 0.0037
Epoch 12/20
93/93 [==============================] - 229s 2s/step - loss: 0.6271 - val_loss: 0.6232 - lr: 0.0033
Epoch 13/20
93/93 [==============================] - 228s 2s/step - loss: 0.6239 - val_loss: 0.6219 - lr: 0.0030
Epoch 14/20
93/93 [==============================] - 229s 2s/step - loss: 0.6193 - val_loss: 0.6175 - lr: 0.0027
Epoch 15/20
93/93 [==============================] - 228s 2s/step - loss: 0.6160 - val_loss: 0.6157 - lr: 0.0025
Epoch 16/20
93/93 [==============================] - 229s 2s/step - loss: 0.6151 - val_loss: 0.6180 - lr: 0.0022
Epoch 17/20
93/93 [==============================] - 229s 2s/step - loss: 0.6107 - val_loss: 0.6106 - lr: 0.0020
Epoch 18/20
93/93 [==============================] - 228s 2s/step - loss: 0.6137 - val_loss: 0.6137 - lr: 0.0018
Epoch 19/20
93/93 [==============================] - 228s 2s/step - loss: 0.6073 - val_loss: 0.6111 - lr: 0.0017
Epoch 20/20
93/93 [==============================] - 227s 2s/step - loss: 0.6058 - val_loss: 0.6104 - lr: 0.0015

Plot¶

In [ ]:
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss function')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
dict_keys(['loss', 'val_loss', 'lr'])
No description has been provided for this image

Test¶

In [ ]:
x_test, y_test = test_generator[0]
for i in range(1, 5):
  print(i)
  x_temp, y_temp = test_generator[i]
  x_test, y_test = np.vstack((x_test,x_temp)), np.vstack((y_test, y_temp))
print(x_test.shape)
print(y_test.shape)

pred = model.predict(x_test)
1
2
3
4
(160, 224, 224, 3)
(160, 224, 224, 5)
5/5 [==============================] - 4s 843ms/step

Dice coefficient¶

In [ ]:
print("Dice coefficient: \t" +  '{:.4}'.format(dice_coef(y_test, pred)))
print("Dice loss function: \t" + '{:.4}'.format(dice_loss_function(y_test, pred)))
print(dice_coef(y_test, y_test))
def dice_coef2(y, pred):
  coef = 0
  for i in range(y.shape[3]):
    intersection = np.sum(y[:,:,:,i] * pred[:,:,:,i])
    set_size = np.sum(y[:,:,:,i]) + np.sum(pred[:,:,:,i])
    coef += (2*intersection)/set_size
  return coef/5.0

print(dice_coef2(y_test, pred))
print(dice_loss_function(tf.constant(y_test), tf.constant(pred)))
Dice coefficient: 	0.8484
Dice loss function: 	0.6047
tf.Tensor(1.0, shape=(), dtype=float64)
0.39527631684334447
tf.Tensor(0.6047237687988212, shape=(), dtype=float64)

IoU¶

In [ ]:
class_names = ["background", "rigid_plastic", "cardboard", "metal", "soft_plastic"]
def mean_intersection_over_union(y, pred):
    result = 0
    for i in range(num_classes):
        intersection = np.sum(y[:,:,:,i]*pred[:,:,:,i])
        union = np.sum(y[:,:,:,i]) + np.sum(pred[:,:,:,i]) - intersection
        print("IoU of " + class_names[i] + " is " + '{:.4}'.format(100*intersection/union))
        result += intersection/union
    return result/num_classes
print("mean intersection over union is " + '{:.4}'.format(100*mean_intersection_over_union(y_test, pred)))
IoU of background is 84.75
IoU of rigid_plastic is 3.132
IoU of cardboard is 46.18
IoU of metal is 1.585
IoU of soft_plastic is 20.13
mean intersection over union is 31.16

Percentage of pixels categorized correctly¶

In [ ]:
# Calculation of the percentage of pixels that are categorized correctly
y_pred_binary = np.argmax(pred, axis=3).flatten()
correct_predictions2 = np.sum(np.equal(y_pred_binary, np.argmax(y_test, axis=3).flatten()))
n, h, w, _ = y_test.shape
pixels = n*h*w
accuracy = correct_predictions2 / pixels * 100.0
print("acuracy is "  + '{:.4}'.format(accuracy))
acuracy is 84.84

Confusion Matrix¶

In [ ]:
from sklearn.metrics import confusion_matrix

#See lab 4 for explanation of Confusion Matrix
pred_flat = np.array([])
image_labels_flat = np.array([])
print(pred.shape)
for i in range(pred.shape[0]):
  image_predictions = np.argmax(pred[i], axis=2).flatten()
  pred_flat = np.hstack([pred_flat,image_predictions])
  img_labels = np.argmax(y_test[i], axis=2).flatten()
  image_labels_flat = np.hstack([image_labels_flat, img_labels])


image_labels_flat = np.hstack([image_labels_flat, np.arange(5)])
pred_flat = np.hstack([pred_flat, np.arange(5)])
conf_matrix = confusion_matrix(image_labels_flat, pred_flat)
print("1: background 2: rigid_plastic 3: cardboard 4: metal 5: soft_plastic\n")
print("Confusion Matrix")
print(conf_matrix-np.eye(5, dtype=np.int64))

np.set_printoptions(precision=2)
conf_matrix_norm = ((conf_matrix.T-np.eye(5, dtype=np.int64)) / np.sum(conf_matrix, axis=1)).T
print("\nNormalized Confusion Matrix")
print(np.round(conf_matrix_norm,3)*100)
(160, 224, 224, 5)
1: background 2: rigid_plastic 3: cardboard 4: metal 5: soft_plastic

Confusion Matrix
[[6106047   45831  262019    9104  144972]
 [  54482    4644    5560     483   15100]
 [ 324160    2599  571878    1884   10010]
 [   3578     258      54     296     503]
 [ 254889   19204   60135    2566  127904]]

Normalized Confusion Matrix
[[93.   0.7  4.   0.1  2.2]
 [67.9  5.8  6.9  0.6 18.8]
 [35.6  0.3 62.8  0.2  1.1]
 [76.3  5.5  1.2  6.3 10.7]
 [54.9  4.1 12.9  0.6 27.5]]

Precision and Recall¶

In [ ]:
show_interesting_facts = False
if show_interesting_facts:
  print("------------------------------ interesting facts ----------------------------------------")
  print("percentage of pixels that are cardboard in training set: " + '{:.4}'.format(100*np.sum(conf_matrix[2,:])/np.sum(conf_matrix)))
  print("percentage of pixels that are background in training set: " + '{:.4}'.format(100*np.sum(conf_matrix[0,:])/np.sum(conf_matrix)) + " and accuracy is " + '{:.4}'.format(accuracy))
  print("Therefore the model is "  + '{:.4}'.format(accuracy - 100*np.sum(conf_matrix[0,:])/np.sum(conf_matrix)) + " percentage points better in accuracy than always guessing background")
  print("\npercentage of time the model guesses background: " + '{:.4}'.format(100*np.sum(conf_matrix[:,0])/np.sum(conf_matrix)))
  print("percentage of time the model guesses cardboard: " + '{:.4}'.format(100*np.sum(conf_matrix[:,2])/np.sum(conf_matrix)))

  print("\n----------------------------------------------------------------------------------------")
classes = ["background", "rigid plastic", "cardboard", "metal\t", "soft plastic"]
print("Precision with regards to each class:\n")
conf_matrix = conf_matrix - np.eye(5, dtype=np.int64)
for i in range(len(classes)):
  if np.sum(conf_matrix[:,i]) == 0:
    print( classes[i] + ": \t" + "no predictions")
  else:
    precision = 100*conf_matrix[i,i]/np.sum(conf_matrix[:,i])
    print( classes[i] + ": \t" + '{:.4}'.format(precision))

# This is also just the diagonal of the normalized confusion matrix
print("\nRecall with regards to each class:\n")
for i in range(len(classes)):
  if np.sum(conf_matrix[i,:]) == 0:
    print(classes[i] + ": \t" + "no predictions")
  else:
    recall = 100*conf_matrix[i,i]/np.sum(conf_matrix[i,:])
    print(classes[i] + ": \t" + '{:.4}'.format(recall))
Precision with regards to each class:

background: 	90.55
rigid plastic: 	6.402
cardboard: 	63.57
metal	: 	2.065
soft plastic: 	42.85

Recall with regards to each class:

background: 	92.97
rigid plastic: 	5.786
cardboard: 	62.81
metal	: 	6.313
soft plastic: 	27.52

Visual inspection¶

In [ ]:
for i in range(0, pred.shape[0], 10):
  print("------------------------------------------------------------\nFor test imgages #" + str(i+1))
  Make_Overlapping_Plot2(x_test[i].astype(np.uint8), y_test[i], pred=pred[i])
------------------------------------------------------------
For test imgages #1
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #11
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #21
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #31
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #41
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #51
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #61
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #71
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #81
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #91
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #101
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #111
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #121
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #131
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #141
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
------------------------------------------------------------
For test imgages #151
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image